Una comparación de metodologías analíticas en el Partido de La Plata
3.1 Resumen Ejecutivo
3.2 Motivos y Objetivos
3.3 Datos, Metodología y Limitaciones
3.4 Resultados
Mostrar el código
import pandas as pdimport geopandas as gpdimport requestsfrom io import StringIOimport boto3import duckdbimport matplotlib.pyplot as pltimport numpy as npimport s2spherefrom botocore.config import Configfrom rasterstats import zonal_statsfrom shapely.geometry import boxUSE_CRS ="EPSG:5349"
Mostrar el código
response = requests.get("https://www.argentina.gob.ar/sites/default/files/renabap-2023-12-06.geojson")renabap = gpd.read_file(StringIO(response.text))renabap_pba = renabap[renabap["provincia"] =="Buenos Aires"]renabap_pba = renabap_pba.to_crs(USE_CRS)peligro_path ="/home/nissim/Documents/dev/ciut-inundaciones/data/la_plata_pelig_2023_datos_originales.geojson"peligro = gpd.read_file(peligro_path)peligro = peligro.to_crs(USE_CRS)# Get the bounds of the peligro layerpeligro_bounds = peligro.total_boundspeligro_bbox = box(*peligro_bounds)# Filter renabap_pba to only include geometries that intersect with the peligro boundsrenabap_pba_intersect = renabap_pba[ renabap_pba.geometry.intersects(peligro_bbox)].copy()# make sure all geometries are validrenabap_pba_intersect = renabap_pba_intersect[renabap_pba_intersect.geometry.is_valid]
3.4.1 Interpolación areal
Mostrar el código
import geopandas as gpdfrom tobler.area_weighted import area_interpolate# Ensure both GeoDataFrames have the same CRSif renabap_pba_intersect.crs != peligro.crs: peligro = peligro.to_crs(renabap_pba_intersect.crs)# Get unique hazard levelshazard_levels = peligro["PELIGROSID"].unique()# Initialize output columns in renabap_pba_intersectfor level in hazard_levels: renabap_pba_intersect[f"porcion_{level}"] =0.0# For each hazard level, calculate the portion of each barrio that falls within itfor level in hazard_levels:# Filter hazard polygons for this level hazard_subset = peligro[peligro["PELIGROSID"] == level].copy()ifnot hazard_subset.empty:# Add dummy variable with value 1 for each hazard polygon hazard_subset["hazard_area"] =1# Interpolate hazard area to barrios (this gives us the proportion) results = area_interpolate( source_df=hazard_subset, target_df=renabap_pba_intersect, extensive_variables=["hazard_area"], )# This gives us the portion of each barrio that overlaps with this hazard level renabap_pba_intersect[f"porcion_{level}"] = results["hazard_area"]# Calculate families exposed to each hazard levelfor level in hazard_levels: renabap_pba_intersect[f"familias_expuestas_{level}"] = ( renabap_pba_intersect[f"porcion_{level}"]* renabap_pba_intersect["familias_aproximadas"] )# Create tidy dataframe with the three required columnsrenabap_tidy = renabap_pba_intersect[["nombre_barrio"]].copy()# Calculate total familias_expuestasrenabap_tidy["fam_expuestas_areal"] = ( renabap_pba_intersect["familias_expuestas_alta"]+ renabap_pba_intersect["familias_expuestas_baja"]+ renabap_pba_intersect["familias_expuestas_media"])# Determine highest hazard level (peligrosidad)def get_highest_hazard(row): exposures = [ row["familias_expuestas_alta"], row["familias_expuestas_baja"], row["familias_expuestas_media"], ] hazard_levels = ["alta", "baja", "media"]return hazard_levels[exposures.index(max(exposures))]renabap_tidy["peligrosidad"] = renabap_pba_intersect.apply(get_highest_hazard, axis=1)# Round fam_expuestas_arealrenabap_tidy["fam_expuestas_areal"] = renabap_tidy["fam_expuestas_areal"].round(2)
3.4.2 Mapeo dasymetrico con datos GHSL
Mostrar el código
import rasterstatsimport rioxarrayfrom shapely.geometry import box# Load GHSL data with dask chunking for memory efficiencyghsl = rioxarray.open_rasterio("/home/nissim/Downloads/spatial/GHS_POP_E2025_GLOBE_R2023A_54009_100_V1_0_R14_C13/GHS_POP_E2025_GLOBE_R2023A_54009_100_V1_0_R14_C13.tif", chunks={"x": 1024, "y": 1024}, # Adjust chunk size based on your memory constraints)# Reproject to your target CRS with streamingghsl = ghsl.rio.reproject(dst_crs=USE_CRS)# Clip to renabap_pba_intersect bounding box using streamingbounding_box = box(*renabap_pba_intersect.total_bounds) # Create a box from the bounding box coordinatesghsl_clipped = ghsl.rio.clip( [bounding_box], # Use the bounding box as a geometry (wrapped in a list) from_disk=True, # Process from disk to avoid loading entire dataset into memory)# Convert to the format expected by rasterstatsgeometries = [geom for geom in renabap_pba_intersect.geometry]# Use rasterstats for vectorized zonal statisticsstats = rasterstats.zonal_stats( geometries, ghsl_clipped.values[0], # rasterstats expects 2D array affine=ghsl_clipped.rio.transform(), stats=["sum"], nodata=ghsl_clipped.rio.nodata,)# Extract the sum valuesghsl_totals = [stat["sum"] if stat["sum"] isnotNoneelse0for stat in stats]# Add the GHSL population estimates as a new columnrenabap_pba_intersect["ghsl_pop_est"] = ghsl_totalsfrom rasterio.features import rasterizeimport numpy as np# Get the reference raster properties from GHSL datareference_raster = ghsl_clippedreference_transform = reference_raster.rio.transform()reference_crs = reference_raster.rio.crsreference_shape = reference_raster.shape[1:] # Get 2D shape (height, width)# Prepare geometries and values for rasterizationgeometries_ghsl = [ (geom, value)for geom, value inzip( renabap_pba_intersect.geometry, renabap_pba_intersect["ghsl_pop_est"] )]geometries_familias = [ (geom, value)for geom, value inzip( renabap_pba_intersect.geometry, renabap_pba_intersect["familias_aproximadas"] )]# Create GHSL population rasterghsl_pop_raster = rasterize( geometries_ghsl, out_shape=reference_shape, transform=reference_transform, fill=0, dtype=np.float32, all_touched=False,)# Create familias aproximadas rasterfamilias_raster = rasterize( geometries_familias, out_shape=reference_shape, transform=reference_transform, fill=0, dtype=np.float32, all_touched=False,)# Step 1: Divide original GHSL by the barrio-level GHSL to get fractional population# Handle division by zero and nodata values properlymask = (ghsl_clipped.values[0] >0) & (ghsl_pop_raster >0.1)ghsl_fractional = np.full_like(ghsl_clipped.values[0], -200, dtype=np.float64)ghsl_fractional[mask] = ghsl_clipped.values[0][mask] / ghsl_pop_raster[mask]# Step 2: Multiply fractional population by familias aproximadas to get downscaled datamask2 = (ghsl_fractional >0) & (familias_raster >0)familias_downscaled = np.full_like(ghsl_clipped.values[0], -200, dtype=np.float64)familias_downscaled[mask2] = ghsl_fractional[mask2] * familias_raster[mask2]# Check that the sum of downscaled familias equals the original familias aproximadastotal_original_familias = renabap_pba_intersect["familias_aproximadas"].sum()valid_downscaled = familias_downscaled[familias_downscaled !=-200]total_downscaled_familias = np.sum(valid_downscaled)# Step 1: Divide original GHSL by the barrio-level GHSL to get fractional population# Use masking to avoid division on invalid cellsmask = (ghsl_clipped.values[0] !=-200) & (ghsl_pop_raster >0.1)ghsl_fractional = np.full_like(ghsl_clipped.values[0], -200, dtype=np.float64)ghsl_fractional[mask] = ghsl_clipped.values[0][mask] / ghsl_pop_raster[mask]# Step 2: Multiply fractional population by familias aproximadas to get downscaled datamask2 = (ghsl_fractional !=-200) & (familias_raster >0)familias_downscaled = np.full_like(ghsl_clipped.values[0], -200, dtype=np.float64)familias_downscaled[mask2] = ghsl_fractional[mask2] * familias_raster[mask2]# Verify the results - exclude -200 from range calculationsghsl_valid = ghsl_clipped.values[0] !=-200fractional_valid = ghsl_fractional !=-200downscaled_valid = familias_downscaled !=-200# Check that the sum of downscaled familias equals the original familias aproximadastotal_original_familias = renabap_pba_intersect["familias_aproximadas"].sum()total_downscaled_familias = np.sum(familias_downscaled[downscaled_valid])print(f"\nTotal original familias: {total_original_familias:,.0f}")print(f"Total downscaled familias: {total_downscaled_familias:,.0f}")print(f"Difference: {abs(total_original_familias - total_downscaled_familias):,.2f}")# Intersect settlements with hazard zonessettlement_hazard = gpd.overlay(renabap_pba_intersect, peligro, how="intersection")# Create GHSL tidy dataframe with matching structureghsl_tidy = []for idx, row in settlement_hazard.iterrows(): stats = zonal_stats( [row.geometry], familias_downscaled, # your numpy array affine=reference_transform, # get transform from your xarray stats=["sum"], nodata=-200, # use your actual nodata value )[0] ghsl_tidy.append( {"nombre_barrio": row["nombre_barrio"],"peligrosidad": row["PELIGROSID"],"fam_expuestas_ghsl": stats["sum"] if stats["sum"] isnotNoneelse0, } )ghsl_tidy = pd.DataFrame(ghsl_tidy)
Total original familias: 88,856
Total downscaled familias: 88,680
Difference: 176.00
3.4.3 Estimaciones según cantidad de edificios
Mostrar el código
def fetch_buildings(geodataframe, temp_file="buildings_filtered.parquet"):"""Fetch building data for a given GeoDataFrame region"""# Get S2 cell and bounds center = geodataframe.to_crs("epsg:3857").union_all().centroid center_wgs84 = ( gpd.GeoDataFrame(geometry=[center], crs="EPSG:3857") .to_crs(epsg=4326) .geometry.iloc[0] ) cell = s2sphere.CellId.from_lat_lng( s2sphere.LatLng.from_degrees(center_wgs84.y, center_wgs84.x) ).parent(10) bounds = geodataframe.to_crs("epsg:4326").total_bounds# Find matching S2 partition s3 = boto3.client("s3", endpoint_url="https://data.source.coop", aws_access_key_id="", aws_secret_access_key="", config=Config(s3={"addressing_style": "path"}), ) partitions = { obj["Key"].split("/")[-1].replace(".parquet", "")for obj in s3.list_objects_v2( Bucket="vida", Prefix="google-microsoft-open-buildings/geoparquet/by_country_s2/country_iso=ARG/", ).get("Contents", []) } parent_id =next(str(cell.parent(level).id())for level inrange(10, 0, -1)ifstr(cell.parent(level).id()) in partitions )# Setup DuckDB and query con = duckdb.connect()for cmd in ["INSTALL spatial","LOAD spatial","INSTALL httpfs","LOAD httpfs","SET s3_region='us-east-1'","SET s3_endpoint='data.source.coop'","SET s3_use_ssl=true","SET s3_url_style='path'", ]: con.execute(cmd)# Export and read back query =f""" COPY (SELECT * FROM 's3://vida/google-microsoft-open-buildings/geoparquet/by_country_s2/country_iso=ARG/{parent_id}.parquet' WHERE bbox.xmax >= {bounds[0]} AND bbox.xmin <= {bounds[2]} AND bbox.ymax >= {bounds[1]} AND bbox.ymin <= {bounds[3]} ) TO '{temp_file}' (FORMAT PARQUET); """ con.execute(query) df = pd.read_parquet(temp_file) df["geometry"] = gpd.GeoSeries.from_wkb(df["geometry"])return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")# Usage:buildings = fetch_buildings(renabap_pba_intersect)# Reproject buildings to match the analysis CRSbuildings_proj = buildings.to_crs(USE_CRS)# Step 1: Calculate buildings per settlement-hazard intersectionbuildings_hazard = gpd.overlay(buildings_proj, settlement_hazard, how="intersection")# Count buildings per settlement-hazard combinationbuildings_per_hazard = ( buildings_hazard.groupby(["nombre_barrio", "PELIGROSID"]) .size() .reset_index(name="buildings_count"))# Step 2: Calculate total buildings per settlement (barrio popular)buildings_settlement = gpd.overlay( buildings_proj, renabap_pba_intersect, how="intersection")total_buildings_per_settlement = ( buildings_settlement.groupby("nombre_barrio") .size() .reset_index(name="total_buildings"))# Step 3: Merge and calculate ratioshazard_ratios = buildings_per_hazard.merge( total_buildings_per_settlement, on="nombre_barrio", how="left")hazard_ratios["building_ratio"] = ( hazard_ratios["buildings_count"] / hazard_ratios["total_buildings"])# Step 4: Get total population per settlement and apply ratiossettlement_population = renabap_pba_intersect[ ["nombre_barrio", "familias_aproximadas"]].copy()# Merge with ratios and calculate population estimatespopulation_estimates = hazard_ratios.merge( settlement_population, on="nombre_barrio", how="left")population_estimates["estimated_population_hazard"] = ( population_estimates["building_ratio"]* population_estimates["familias_aproximadas"])# Step 5: Create final results with totalsfinal_results = population_estimates[ ["nombre_barrio", "PELIGROSID", "estimated_population_hazard"]].copy()# Add total population rows (no hazard breakdown)total_pop_rows = settlement_population.copy()total_pop_rows["PELIGROSID"] ="total"total_pop_rows["estimated_population_hazard"] = total_pop_rows["familias_aproximadas"]# Combinefinal_results = pd.concat( [ final_results, total_pop_rows[["nombre_barrio", "PELIGROSID", "estimated_population_hazard"]], ], ignore_index=True,)# Create buildings tidy dataframe with matching structurebuildings_tidy = final_results[ ["nombre_barrio", "PELIGROSID", "estimated_population_hazard"]].copy()# Rename columns to match the structurebuildings_tidy = buildings_tidy.rename( columns={"PELIGROSID": "peligrosidad","estimated_population_hazard": "fam_expuestas_edificios", })# Filter out the 'total' rows since we only want hazard-specific databuildings_tidy = buildings_tidy[buildings_tidy["peligrosidad"] !="total"].copy()
3.4.4 Comparación de resultados
Mostrar el código
# Join all three dataframes by nombre_barrio and peligrosidadfinal_df = renabap_tidy.merge( ghsl_tidy, on=["nombre_barrio", "peligrosidad"], how="outer")final_df = final_df.merge( buildings_tidy, on=["nombre_barrio", "peligrosidad"], how="outer")# Impute 0s for NA values in fam_expuestas columnsfam_expuestas_columns = [col for col in final_df.columns if'fam_expuestas'in col]final_df[fam_expuestas_columns] = final_df[fam_expuestas_columns].fillna(0)# Create long format dataframe with aggregationfinal_tidy = []# Add renabap datafor _, row in renabap_tidy.iterrows(): final_tidy.append( {"nombre_barrio": row["nombre_barrio"],"peligrosidad": row["peligrosidad"],"metodo": "area","fam_expuestas": row["fam_expuestas_areal"], } )# Add ghsl datafor _, row in ghsl_tidy.iterrows(): final_tidy.append( {"nombre_barrio": row["nombre_barrio"],"peligrosidad": row["peligrosidad"],"metodo": "ghsl","fam_expuestas": row["fam_expuestas_ghsl"], } )# Add buildings datafor _, row in buildings_tidy.iterrows(): final_tidy.append( {"nombre_barrio": row["nombre_barrio"],"peligrosidad": row["peligrosidad"],"metodo": "edificios","fam_expuestas": row["fam_expuestas_edificios"], } )final_tidy = pd.DataFrame(final_tidy)# Aggregate to get one observation per barrio per hazard level per methodfinal_tidy = ( final_tidy.groupby(["nombre_barrio", "peligrosidad", "metodo"])["fam_expuestas"] .sum() .reset_index())# Create complete combination of all barrios, hazard levels, and methodsall_barrios = final_tidy["nombre_barrio"].unique()all_hazard_levels = ["alta", "baja", "media"]all_methods = ["area", "ghsl", "edificios"]complete_combinations = pd.DataFrame([ {"nombre_barrio": barrio, "peligrosidad": hazard, "metodo": method}for barrio in all_barriosfor hazard in all_hazard_levelsfor method in all_methods])# Merge with actual data and fill missing values with 0final_tidy = complete_combinations.merge( final_tidy, on=["nombre_barrio", "peligrosidad", "metodo"], how="left")final_tidy["fam_expuestas"] = final_tidy["fam_expuestas"].fillna(0)# Calculate total exposure per hazard level per methodsummary = ( final_tidy.groupby(["peligrosidad", "metodo"])["fam_expuestas"] .sum() .reset_index() .pivot(index="peligrosidad", columns="metodo", values="fam_expuestas"))print("Total Familias Expuestas por Peligrosidad y Método:")print("="*50)print(summary.round(2))import matplotlib.pyplot as pltimport seaborn as sns# Filter for high exposure (alta peligrosidad)alta_data = final_tidy[final_tidy["peligrosidad"] =="alta"].copy()# Calculate total exposure per barrio across all methodstotal_exposure = ( alta_data.groupby("nombre_barrio")["fam_expuestas"] .sum() .sort_values(ascending=False))top_25_barrios = total_exposure.head(25).index# Filter data for top 25 barriostop_25_data = alta_data[ alta_data["nombre_barrio"].isin(top_25_barrios)].copy()# Create range plot showing min, max, and individual pointsplt.figure(figsize=(15, 10))# Define colors for methodsmethod_colors = {"area": "blue", "ghsl": "red", "edificios": "green"}for i, barrio inenumerate(top_25_barrios): barrio_data = top_25_data[top_25_data["nombre_barrio"] == barrio]iflen(barrio_data) >0: values = barrio_data["fam_expuestas"].values min_val = values.min() max_val = values.max()# Plot range line plt.plot([min_val, max_val], [i, i], "k-", alpha=0.5, linewidth=2)# Plot individual points colored by methodfor _, row in barrio_data.iterrows(): color = method_colors[row["metodo"]] plt.plot(row["fam_expuestas"], i, "o", color=color, markersize=6, alpha=0.8)plt.yticks(range(len(top_25_barrios)), top_25_barrios)plt.xlabel("Familias Expuestas")plt.ylabel("Barrio")plt.title("Range of High Exposure Estimates for Top 25 Barrios", fontsize=14)plt.grid(True, alpha=0.3)# Add legendlegend_elements = [ plt.Line2D( [0], [0], marker="o", color="w", markerfacecolor=color, markersize=8, label=method, )for method, color in method_colors.items()]plt.legend(handles=legend_elements, title="Método")plt.tight_layout()plt.show()
Total Familias Expuestas por Peligrosidad y Método:
==================================================
metodo area edificios ghsl
peligrosidad
alta 1909.76 3606.85 2831.97
baja 5315.18 9852.58 7726.58
media 3599.65 9717.37 8400.36